{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b6863e61-0c9e-4bfa-9d07-29acccc53023",
   "metadata": {},
   "source": [
    "**Pre-requisites**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de7f1d83-8c89-4c42-b6cd-2ed641c3f982",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python -m pip install pandas pyarrow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b31a0d1d-f4fb-4ed1-bb36-3d83b091aec0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "67077b0b-17ff-4357-9a54-26b0a82d94d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb101f27-d344-4f1c-b093-9b08eab444ca",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "beach_boys"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c45de02-5ba2-4c7a-9068-92707508842a",
   "metadata": {},
   "source": [
    "**How to Reset an Index in a pandas DataFrame With `.reset_index()`**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98823343-7f50-4bbf-a951-ea02b9329a70",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.sort_values(by=\"first_name\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fd5db9f-0180-404b-8dae-099a48203f53",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.sort_values(by=\"first_name\").reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4239013e-1bf8-41c7-9228-00a669e19a57",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = beach_boys.sort_values(by=\"first_name\").reset_index(drop=True)\n",
    "\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bf85bcf0-12dc-4c7d-a25e-5109bb057a76",
   "metadata": {},
   "source": [
    "**Reset an Index Directly With `.index`**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b19f621-f2da-4bbd-80c0-942cb9244d76",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "initials = [\n",
    "    \"BW\",\n",
    "    \"ML\",\n",
    "    \"AJ\",\n",
    "    \"BJ\",\n",
    "    \"CW\",\n",
    "    \"DW\",\n",
    "    \"DM\",\n",
    "    \"RF\",\n",
    "    \"BC\",\n",
    "]\n",
    "\n",
    "beach_boys.index = initials\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25a26ee2-b3bb-4988-9d59-d390ca5e73c9",
   "metadata": {},
   "source": [
    "**Select Rows Using `.loc[]` and `.iloc[]`**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fab39a99-4c2f-40bf-aadf-b57cd937a1a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "initials = [\n",
    "    \"BW\",\n",
    "    \"ML\",\n",
    "    \"AJ\",\n",
    "    \"BJ\",\n",
    "    \"CW\",\n",
    "    \"DW\",\n",
    "    \"DM\",\n",
    "    \"RF\",\n",
    "    \"BC\",\n",
    "]\n",
    "\n",
    "beach_boys.index = initials"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3cdb97d-67b2-4655-bc01-5ab5506c83ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.loc[[\"BW\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eddc3706-51f8-425e-8c91-a389bb59ad7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.iloc[[1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11f190ff-400d-42ee-b322-b4b23eb2dc15",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.loc[\"BW\":\"BJ\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab7b2d99-5870-4b45-91d3-266609270c0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.iloc[1:4]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d41359c2-87de-4cdd-809c-9dcf90d52b3d",
   "metadata": {},
   "source": [
    "**Reset an Index Directly With `.set_axis()`**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0fdee9b5-3dbd-4e74-9cb1-e426093b4f09",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "beach_boys.set_axis(range(len(beach_boys)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "04358191-5878-4260-ac34-54264c43902d",
   "metadata": {},
   "outputs": [],
   "source": [
    "%timeit -n 1000 beach_boys.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37980a62-1ff1-4d38-956a-d1ca978f7407",
   "metadata": {},
   "outputs": [],
   "source": [
    "%timeit -n 1000 beach_boys.index = pd.RangeIndex(len(beach_boys.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1cd8531-652a-4f60-83d3-d43dc70ba2ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "%timeit -n 1000 beach_boys.set_axis(range(len(beach_boys)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5619822e-7fb0-4c7b-ab5a-129f3243f8c5",
   "metadata": {},
   "source": [
    "**Restore a Sequential Index**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e01df44-2cf8-42b2-a12f-f4d581312c39",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2cb0c30d-ca9c-4519-9154-408f65824820",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.drop(labels=[3, 5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b85d26af-e073-44de-aaaf-c1e630b35229",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.drop(labels=[3, 5]).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ddde0dbc-f91f-4dc8-b180-54014b14b8de",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.drop(labels=[3, 5]).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a506c68-e74f-4f35-a837-322946d7a882",
   "metadata": {},
   "source": [
    "**Remove Duplicate Index Values**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2736456-2e6b-4f1e-89b2-cd7400a0bfee",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "guitar_players = beach_boys.query(\"instrument == 'Guitar'\").reset_index(\n",
    "    drop=True\n",
    ")\n",
    "\n",
    "guitar_players"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d18846e7-05ab-4713-b869-a6c95f59bcaa",
   "metadata": {},
   "outputs": [],
   "source": [
    "others = beach_boys.query(\"instrument != 'Guitar'\").reset_index(drop=True)\n",
    "\n",
    "others"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1701609-8a70-4670-b466-8db357ca3182",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_beach_boys = pd.concat([guitar_players, others])\n",
    "all_beach_boys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8deb773b-d40c-4db4-aa3a-dc09991415ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_beach_boys.loc[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c2ea368-cf03-462c-865d-3f1bffebd3c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_beach_boys.iloc[[3]]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a822bb24-af5b-47d1-b51e-0d93a419e95f",
   "metadata": {},
   "source": [
    "> The following code will fail due to the duplicate index.\n",
    "\n",
    "```\n",
    "all_beach_boys.loc[3:4]\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5a7d4ce-74eb-440d-b234-3adaf960dfc4",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_beach_boys.iloc[3:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a379a7f3-6b29-43d8-8893-445e335068ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_beach_boys.sort_index().loc[3:4]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf30ce38-8b34-426b-801d-4329a19b36e8",
   "metadata": {},
   "source": [
    "> The following code will fail due to the duplicate index:\n",
    "\n",
    "```\n",
    "all_beach_boys.filter(items=[1, 3], axis=\"index\")\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eef40ce9-5814-4c53-bb7e-6c216eeed6d8",
   "metadata": {},
   "source": [
    "**Use an Existing Column as an Index**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d89d0233-6caa-4ee4-bae5-80cc85b8238b",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "beach_boys.set_index(\"first_name\").loc[[\"Brian\", \"Carl\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11abd710-8384-40a9-b965-e0a4ac863765",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.index = [f\"Employee_{x + 1}\" for x in range(len(beach_boys))]\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fbba98c-7719-4c85-8700-52fdcf7620e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.loc[[\"Employee_4\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "590ac61d-0c8d-48bd-a5bd-4ebd458f6899",
   "metadata": {},
   "source": [
    "**Align Indexes of Several DataFrames**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de1d3982-e319-4832-8933-4ddc68416679",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales = pd.read_csv(\"week1_record_sales.csv\").set_index(\"index\")\n",
    "\n",
    "week1_sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ddd30d0e-722e-4af5-bfa5-776b52ca3924",
   "metadata": {},
   "outputs": [],
   "source": [
    "week2_sales = pd.read_csv(\"week2_record_sales.csv\").set_index(\"index\")\n",
    "\n",
    "week2_sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76cb534c-b6c6-4293-8e85-5d5615f8eff2",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales.loc[:, \"sales\"] + week2_sales.loc[:, \"sales\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51195c17-b2cd-4a4c-a58b-c91341db7e32",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales.merge(week2_sales, left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "966792a2-b608-4708-b43c-fa6c81cc2853",
   "metadata": {},
   "outputs": [],
   "source": [
    "week2_sales = week2_sales.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74eebf7b-e7d1-40fa-a916-5a66ede9ba43",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales.loc[:, \"sales\"] + week2_sales.loc[:, \"sales\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8ce9e3f8-e39b-498f-b807-a4ac36808aa7",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales.merge(week2_sales, left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6b0afa7c-7c21-4786-96dc-d073d70632eb",
   "metadata": {},
   "source": [
    "**Resetting Multi-Indexes**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "84875451-a540-4b99-9e64-7a89979259ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "cereals = pd.read_csv(\"cereals.csv\").convert_dtypes(dtype_backend=\"pyarrow\")\n",
    "cereals.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf46a23d-a713-442a-8d1f-ff0d7ade6ad9",
   "metadata": {},
   "outputs": [],
   "source": [
    "cereals.pivot_table(\n",
    "    values=\"fiber\",\n",
    "    index=[\"manufacturer\", \"type\"],\n",
    "    aggfunc=\"mean\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b800c41b-a419-4ca9-9f44-eb2e142994b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "cereals.pivot_table(\n",
    "    values=\"fiber\",\n",
    "    index=[\"manufacturer\", \"type\"],\n",
    "    aggfunc=\"mean\",\n",
    ").index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "809b06cb-20cd-43c7-acfd-54dcff8a72b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "cereals.pivot_table(\n",
    "    values=\"fiber\", index=[\"manufacturer\", \"type\"], aggfunc=\"mean\"\n",
    ").reset_index(level=1, drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54fe7d99-1cd6-4a14-8b42-af794cd52766",
   "metadata": {},
   "outputs": [],
   "source": [
    "cereals.pivot_table(\n",
    "    values=\"fiber\",\n",
    "    index=[\"manufacturer\", \"type\"],\n",
    "    aggfunc=\"mean\",\n",
    ").reset_index()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
